import pandas as pd
import os
from datasets import Dataset, DatasetDict

data_dir = r'D:\_dell7590_root\sync\1_usb\N1\large_sci.com.ai.large_data\NLP\waimai'

df_val = pd.read_csv(os.path.join(data_dir, 'waimai_10k-test.csv'))
# print(df_val[:5])

val_ds = Dataset.from_pandas(df_val)
print(val_ds)


def preprocess01(batch):
    xlist = []
    for i, ex in enumerate(batch['label']):
        xlist.append(f'我是{ex}呀！')
    return {
        'label03': xlist
    }


val_ds_ = val_ds
val_ds = val_ds.map(preprocess01, batched=True, batch_size=32)
print(val_ds)
print(val_ds[:5])
